Workflow

The plague-phylogeography snakemake pipeline.

Click the nodes to obtain details about each step.

Alignment

Snippy Pairwise

Post-Alignment

Qualimap

Quality Control

MultiQC

Statistics

If the workflow has been executed in cluster/cloud, runtimes include the waiting time in the queue.

Configuration

Configuration files
File Code
/2/scratch/keaton/plague-phylogeography/config/snakemake.yaml
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# Snakemake Configuration File

# Conda Environments
#conda_eager_env : "nf-core-eager-2.2.0dev"

# SQLITE Parameters
sqlite_db : "yersinia_pestis_db.sqlite"
sqlite_select_command_asm : "SELECT AssemblyFTPGenbank FROM Master WHERE (BioSampleComment LIKE '%KEEP%Assembly%')"
sqlite_select_command_sra : "SELECT BioSampleAccession,SRARunAccession,SRALibraryLayout,SRAFileURL FROM Master WHERE (BioSampleComment LIKE '%KEEP: EAGER Ancient%')"
#sqlite_select_command_sra : "SELECT BioSampleAccession,SRARunAccession,SRALibraryLayout,SRAFileURL FROM Master WHERE (SRARunAccession = 'SRR1048902' OR SRARunAccession = 'SRR1048905')"
#sqlite_select_command_sra : "SELECT BioSampleAccession,SRARunAccession,SRALibraryLayout,SRAFileURL FROM Master WHERE (SRABioSampleAccession = 'SAMEA3937653')"
sqlite_select_command_ref : "SELECT AssemblyFTPGenbank FROM Master WHERE (BioSampleComment LIKE '%Reference%')"
max_datasets_assembly : 500
#max_datasets_assembly : 3
#max_datasets_sra : 2
max_datasets_sra : 100

# misc filtering
detect_repeats_threshold : 90
detect_repeats_length : 50

reference_locus : "AL590842"
reference_locus_name : "chromosome"
reference_locus_start : "0"
reference_locus_end : "4653728"

# Eager param
#eager_rev : "7b51863957"
eager_rev: "dev"
eager_clip_readlength : 35
eager_bwaalnn : 0.01
eager_bwaalnl : 16
organism : "Yersinia pestis"

# Snippy Parameters
snippy_ctg_depth : 10
snippy_bam_depth : 3
snippy_base_qual : 20
snippy_map_qual : 30
snippy_min_frac : 0.9
snippy_mask_char : "X"
snippy_missing_data : 0
snippy_snp_density : 10

# IQTREE
#iqtree_model = "K3Pu+F+I"
iqtree_seed : "47321424" # keeping it consistent in a config file allows for checkpointing
iqtree_outgroup : "Reference"
#iqtree_outgroup : "GCA_000323485.1_ASM32348v1_genomic,GCA_000323845.1_ASM32384v1_genomic"
iqtree_other : "--ufboot 1000 --alrt 1000"
iqtree_runs : 1

Loading...